# implementation of
# "The Fast Cauchy Transform and Faster Robust Linear Regression∗" Clarkson et al 2012
import numpy as np
from scipy.linalg import hadamard

def next_power_of_2(x):
    return 1 if x == 0 else 2**(x - 1).bit_length()



def getFCT1(A, r1, s):
    print('Start FCT1')
    A= np.array(A)
    n,d = np.shape(A)
    # r1 = alpha * d * np.log(d/delta)
    BC = np.zeros((r1, 2*n))

    # go over each column of BC, set one entry of each column of BC to be a non-zero cauchy draw.
    randomr1 = np.random.randint(0,r1,size=2*n)
    cauchys  = np.random.standard_cauchy(size=2*n)
    for i in range(2*n):
        BC[randomr1, i] = cauchys[i]

    # s = np.power(r1, 4)
    # s = next_power_of_2(s)
    # print("s={}".format(s))

    Hs = hadamard(s)/np.sqrt(s)
    n_by_s = int(np.floor(n/s))
    tildeHdotA = np.zeros((2*n, d))

    for ii in range(n_by_s-1):
        beginIndex = ii * s

        tildeHdotA[beginIndex:(beginIndex+s), :] += Hs.dot(A[beginIndex: (beginIndex+s), :])

        # aa = A[(ii+s): (ii+2*s), :]
        # bb = tildeHdotA[(ii+s): (ii+2*s), :]
        # print(np.shape(aa))
        # print(np.shape(bb))
        tildeHdotA[(beginIndex+s): (beginIndex+2*s), :] = A[(beginIndex+s): (beginIndex+2*s), :]
    PiA= 4 * BC.dot(tildeHdotA)
    print('PiA done')

    Q,R = np.linalg.qr(PiA)
    print('QR done')

    temp = np.dot(Q.T, PiA)
    nn,dd = np.shape(Q) # will be square so nn == dd.
    nA, dA = np.shape(PiA)
    # print(np.shape(R))
    #
    # print(np.shape(Q))
    # print(np.shape(PiA))
    # print(np.shape(temp))
    # print(temp)

    Rinv = np.linalg.inv(temp[0:nn,0:nn])
    print('Rinv done')
    stackofzeros = np.zeros((dA-nn, nn),dtype=Rinv.dtype)
    # print(np.shape(Rinv))
    # print(np.shape(stackofzeros))

    temp2 = np.concatenate((np.array(Rinv), np.array(stackofzeros)), axis=0)

    return np.dot(A,temp2)

    # return np.dot(A, np.dstack((Rinv,stackofzeros)))

    pass


# FCT as described in Sec 3.1 of https://arxiv.org/pdf/1207.4684.pdf
def getFCT1old(A, r1, s):
    print('Start FCT1')
    A= np.array(A)
    n,d = np.shape(A)
    # r1 = alpha * d * np.log(d/delta)
    BC = np.zeros((r1, 2*n))

    # go over each column of BC, set one entry of each column of BC to be a non-zero cauchy draw.
    randomr1 = np.random.randint(0,r1,size=2*n)
    cauchys  = np.random.standard_cauchy(size=2*n)
    for i in range(2*n):
        BC[randomr1, i] = cauchys[i]

    # s = np.power(r1, 4)
    # s = next_power_of_2(s)
    # print("s={}".format(s))

    Hs = hadamard(s)/np.sqrt(s)
    n_by_s = int(np.floor(n/s))
    tildeHdotA = np.zeros((2*n, d))

    for ii in range(n_by_s-1):
        beginIndex = ii * s

        tildeHdotA[beginIndex:(beginIndex+s), :] += np.dot(Hs, A[beginIndex: (beginIndex+s), :])

        # aa = A[(ii+s): (ii+2*s), :]
        # bb = tildeHdotA[(ii+s): (ii+2*s), :]
        # print(np.shape(aa))
        # print(np.shape(bb))
        tildeHdotA[(beginIndex+s): (beginIndex+2*s), :] = A[(beginIndex+s): (beginIndex+2*s), :]
    PiA= 4 * np.dot(BC, tildeHdotA)
    print('PiA done')

    Q,R = np.linalg.qr(PiA)
    print('QR done')

    temp = np.dot(Q.T, PiA)
    nn,dd = np.shape(Q) # will be square so nn == dd.
    nA, dA = np.shape(PiA)
    # print(np.shape(R))
    #
    # print(np.shape(Q))
    # print(np.shape(PiA))
    # print(np.shape(temp))
    # print(temp)

    Rinv = np.linalg.inv(temp[0:nn,0:nn])
    print('Rinv done')
    stackofzeros = np.zeros((dA-nn, nn),dtype=Rinv.dtype)
    # print(np.shape(Rinv))
    # print(np.shape(stackofzeros))

    temp2 = np.concatenate((np.array(Rinv), np.array(stackofzeros)), axis=0)

    return np.dot(A,temp2)

    # return np.dot(A, np.dstack((Rinv,stackofzeros)))

    pass
